1
2
3
4
5
6
7
8
9
10
11 """Restriction Enzyme classes.
12
13 Notes about the diverses class of the restriction enzyme implementation::
14
15 RestrictionType is the type of all restriction enzymes.
16 ----------------------------------------------------------------------------
17 AbstractCut implements some methods that are common to all enzymes.
18 ----------------------------------------------------------------------------
19 NoCut, OneCut,TwoCuts represent the number of double strand cuts
20 produced by the enzyme.
21 they correspond to the 4th field of the
22 rebase record emboss_e.NNN.
23 0->NoCut : the enzyme is not characterised.
24 2->OneCut : the enzyme produce one double strand cut.
25 4->TwoCuts : two double strand cuts.
26 ----------------------------------------------------------------------------
27 Meth_Dep, Meth_Undep represent the methylation susceptibility to
28 the enzyme.
29 Not implemented yet.
30 ----------------------------------------------------------------------------
31 Palindromic, if the site is palindromic or not.
32 NotPalindromic allow some optimisations of the code.
33 No need to check the reverse strand
34 with palindromic sites.
35 ----------------------------------------------------------------------------
36 Unknown, Blunt, represent the overhang.
37 Ov5, Ov3 Unknown is here for symmetry reasons and
38 correspond to enzymes that are not
39 characterised in rebase.
40 ----------------------------------------------------------------------------
41 Defined, Ambiguous, represent the sequence of the overhang.
42 NotDefined
43 NotDefined is for enzymes not characterised
44 in rebase.
45
46 Defined correspond to enzymes that display
47 a constant overhang whatever the sequence.
48 ex : EcoRI. G^AATTC -> overhang :AATT
49 CTTAA^G
50
51 Ambiguous : the overhang varies with the
52 sequence restricted.
53 Typically enzymes which cut outside their
54 restriction site or (but not always)
55 inside an ambiguous site.
56 ex:
57 AcuI CTGAAG(22/20) -> overhang : NN
58 AasI GACNNN^NNNGTC -> overhang : NN
59 CTGN^NNNNNCAG
60
61 note : these 3 classes refers to the overhang not the site.
62 So the enzyme ApoI (RAATTY) is defined even if its
63 restriction site is ambiguous.
64
65 ApoI R^AATTY -> overhang : AATT -> Defined
66 YTTAA^R
67 Accordingly, blunt enzymes are always Defined even
68 when they cut outside their restriction site.
69 ----------------------------------------------------------------------------
70 Not_available, as found in rebase file emboss_r.NNN files.
71 Commercially_available
72 allow the selection of the enzymes
73 according to their suppliers to reduce the
74 quantity of results.
75 Also will allow the implementation of
76 buffer compatibility tables. Not
77 implemented yet.
78
79 the list of suppliers is extracted from
80 emboss_s.NNN
81 ----------------------------------------------------------------------------
82 """
83
84 from __future__ import print_function
85
86 import warnings
87
88 from Bio._py3k import zip
89 from Bio._py3k import filter
90 from Bio._py3k import range
91
92 import re
93 import itertools
94
95 from Bio.Seq import Seq, MutableSeq
96
97 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
98 from Bio.Restriction.Restriction_Dictionary import typedict
99 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
100 from Bio.Restriction.RanaConfig import ConsoleWidth, NameWidth, Indent, MaxSize
101 from Bio.Restriction.RanaConfig import ftp_proxy, ftp_Rebase
102 from Bio.Restriction.RanaConfig import ftp_emb_e, ftp_emb_s, ftp_emb_r
103 from Bio.Restriction.PrintFormat import PrintFormat
104 from Bio import BiopythonWarning
112 """Check characters in a string (PRIVATE).
113
114 Remove digits and white space present in string. Allows any valid ambiguous
115 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
116
117 Other characters (e.g. symbols) trigger a TypeError.
118
119 Returns the string WITH A LEADING SPACE (!). This is for backwards
120 compatibility, and may in part be explained by the fact that
121 Bio.Restriction doesn't use zero based counting.
122 """
123
124 seq_string = "".join(seq_string.split()).upper()
125
126 for c in "0123456789":
127 seq_string = seq_string.replace(c, "")
128
129 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):
130 raise TypeError("Invalid character found in %s" % repr(seq_string))
131 return " " + seq_string
132
133
134 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN',
135 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY',
136 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY',
137 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY',
138 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY',
139 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'}
140
141 DNA = Seq
246
249 """RestrictionType. Type from which derives all enzyme classes.
250
251 Implement the operator methods.
252 """
253
254 - def __init__(cls, name='', bases=(), dct=None):
255 """RE(name, bases, dct) -> RestrictionType instance.
256
257 Not intended to be used in normal operation. The enzymes are
258 instantiated when importing the module.
259
260 see below."""
261 if "-" in name:
262 raise ValueError("Problem with hyphen in %s as enzyme name"
263 % repr(name))
264
265
266
267 try:
268 cls.compsite = re.compile(cls.compsite)
269 except Exception as err:
270 raise ValueError("Problem with regular expression, re.compiled(%s)"
271 % repr(cls.compsite))
272
285
287 """RE.__div__(other) -> list.
288
289 RE/other
290 returns RE.search(other)."""
291 return cls.search(other)
292
294 """RE.__rdiv__(other) -> list.
295
296 other/RE
297 returns RE.search(other)."""
298 return cls.search(other)
299
301 """RE.__truediv__(other) -> list.
302
303 RE/other
304 returns RE.search(other)."""
305 return cls.search(other)
306
308 """RE.__rtruediv__(other) -> list.
309
310 other/RE
311 returns RE.search(other)."""
312 return cls.search(other)
313
315 """RE.__floordiv__(other) -> list.
316
317 RE//other
318 returns RE.catalyse(other)."""
319 return cls.catalyse(other)
320
322 """RE.__rfloordiv__(other) -> list.
323
324 other//RE
325 returns RE.catalyse(other)."""
326 return cls.catalyse(other)
327
329 """RE.__str__() -> str.
330
331 return the name of the enzyme."""
332 return cls.__name__
333
335 """RE.__repr__() -> str.
336
337 used with eval or exec will instantiate the enzyme."""
338 return "%s" % cls.__name__
339
341 """RE.__len__() -> int.
342
343 length of the recognition site."""
344 return cls.size
345
347
348
349 return id(cls)
350
352 """RE == other -> bool
353
354 True if RE and other are the same enzyme.
355
356 Specifically this checks they are the same Python object.
357 """
358
359 return id(cls) == id(other)
360
362 """RE != other -> bool.
363 isoschizomer strict, same recognition site, same restriction -> False
364 all the other-> True
365
366 WARNING - This is not the inverse of the __eq__ method.
367 """
368 if not isinstance(other, RestrictionType):
369 return True
370 elif cls.charac == other.charac:
371 return False
372 else:
373 return True
374
376 """RE >> other -> bool.
377
378 neoschizomer : same recognition site, different restriction. -> True
379 all the others : -> False
380 """
381 if not isinstance(other, RestrictionType):
382 return False
383 elif cls.site == other.site and cls.charac != other.charac:
384 return True
385 else:
386 return False
387
389 """a % b -> bool.
390
391 Test compatibility of the overhang of a and b.
392 True if a and b have compatible overhang.
393 """
394 if not isinstance(other, RestrictionType):
395 raise TypeError(
396 'expected RestrictionType, got %s instead' % type(other))
397 return cls._mod1(other)
398
400 """a >= b -> bool.
401
402 a is greater or equal than b if the a site is longer than b site.
403 if their site have the same length sort by alphabetical order of their
404 names."""
405 if not isinstance(other, RestrictionType):
406 raise NotImplementedError
407 if len(cls) > len(other):
408 return True
409 elif cls.size == len(other) and cls.__name__ >= other.__name__:
410 return True
411 else:
412 return False
413
415 """a > b -> bool.
416
417 sorting order:
418 1. size of the recognition site.
419 2. if equal size, alphabetical order of the names."""
420 if not isinstance(other, RestrictionType):
421 raise NotImplementedError
422 if len(cls) > len(other):
423 return True
424 elif cls.size == len(other) and cls.__name__ > other.__name__:
425 return True
426 else:
427 return False
428
430 """a <= b -> bool.
431
432 sorting order:
433 1. size of the recognition site.
434 2. if equal size, alphabetical order of the names.
435 """
436 if not isinstance(other, RestrictionType):
437 raise NotImplementedError
438 elif len(cls) < len(other):
439 return True
440 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
441 return True
442 else:
443 return False
444
446 """a < b -> bool.
447
448 sorting order:
449 1. size of the recognition site.
450 2. if equal size, alphabetical order of the names.
451 """
452 if not isinstance(other, RestrictionType):
453 raise NotImplementedError
454 elif len(cls) < len(other):
455 return True
456 elif len(cls) == len(other) and cls.__name__ < other.__name__:
457 return True
458 else:
459 return False
460
463 """Implement the methods that are common to all restriction enzymes.
464
465 All the methods are classmethod.
466
467 For internal use only. Not meant to be instantiate.
468 """
469
470 @classmethod
471 - def search(cls, dna, linear=True):
472 """RE.search(dna, linear=True) -> list.
473
474 return a list of all the site of RE in dna. Compensate for circular
475 sequences and so on.
476
477 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
478
479 if linear is False, the restriction sites than span over the boundaries
480 will be included.
481
482 The positions are the first base of the 3' fragment,
483 i.e. the first base after the position the enzyme will cut.
484 """
485
486
487
488
489
490
491
492 if isinstance(dna, FormattedSeq):
493 cls.dna = dna
494 return cls._search()
495 else:
496 cls.dna = FormattedSeq(dna, linear)
497 return cls._search()
498
499 @classmethod
501 """RE.all_suppliers -> print all the suppliers of R"""
502 supply = sorted(x[0] for x in suppliers_dict.values())
503 print(",\n".join(supply))
504 return
505
506 @classmethod
508 """RE.is_equischizomers(other) -> bool.
509
510 True if other is an isoschizomer of RE.
511 False else.
512
513 equischizomer <=> same site, same position of restriction.
514 """
515 return not cls != other
516
517 @classmethod
519 """RE.is_neoschizomers(other) -> bool.
520
521 True if other is an isoschizomer of RE.
522 False else.
523
524 neoschizomer <=> same site, different position of restriction.
525 """
526 return cls >> other
527
528 @classmethod
530 """RE.is_isoschizomers(other) -> bool.
531
532 True if other is an isoschizomer of RE.
533 False else.
534
535 isoschizomer <=> same site."""
536 return (not cls != other) or cls >> other
537
538 @classmethod
540 """RE.equischizomers([batch]) -> list.
541
542 return a tuple of all the isoschizomers of RE.
543 if batch is supplied it is used instead of the default AllEnzymes.
544
545 equischizomer <=> same site, same position of restriction.
546 """
547 if not batch:
548 batch = AllEnzymes
549 r = [x for x in batch if not cls != x]
550 i = r.index(cls)
551 del r[i]
552 r.sort()
553 return r
554
555 @classmethod
557 """RE.neoschizomers([batch]) -> list.
558
559 return a tuple of all the neoschizomers of RE.
560 if batch is supplied it is used instead of the default AllEnzymes.
561
562 neoschizomer <=> same site, different position of restriction."""
563 if not batch:
564 batch = AllEnzymes
565 r = sorted(x for x in batch if cls >> x)
566 return r
567
568 @classmethod
570 """RE.isoschizomers([batch]) -> list.
571
572 return a tuple of all the equischizomers and neoschizomers of RE.
573 if batch is supplied it is used instead of the default AllEnzymes.
574 """
575 if not batch:
576 batch = AllEnzymes
577 r = [x for x in batch if (cls >> x) or (not cls != x)]
578 i = r.index(cls)
579 del r[i]
580 r.sort()
581 return r
582
583 @classmethod
585 """RE.frequency() -> int.
586
587 frequency of the site."""
588 return cls.freq
589
590
591 -class NoCut(AbstractCut):
592 """Implement the methods specific to the enzymes that do not cut.
593
594 These enzymes are generally enzymes that have been only partially
595 characterised and the way they cut the DNA is unknow or enzymes for
596 which the pattern of cut is to complex to be recorded in Rebase
597 (ncuts values of 0 in emboss_e.###).
598
599 When using search() with these enzymes the values returned are at the start
600 of the restriction site.
601
602 Their catalyse() method returns a TypeError.
603
604 Unknown and NotDefined are also part of the base classes of these enzymes.
605
606 Internal use only. Not meant to be instantiated.
607 """
608
609 @classmethod
611 """RE.cut_once() -> bool.
612
613 True if the enzyme cut the sequence one time on each strand."""
614 return False
615
616 @classmethod
618 """RE.cut_twice() -> bool.
619
620 True if the enzyme cut the sequence twice on each strand."""
621 return False
622
623 @classmethod
625 """RE._modify(location) -> int.
626
627 for internal use only.
628
629 location is an integer corresponding to the location of the match for
630 the enzyme pattern in the sequence.
631 _modify returns the real place where the enzyme will cut.
632
633 example::
634
635 EcoRI pattern : GAATTC
636 EcoRI will cut after the G.
637 so in the sequence:
638 ______
639 GAATACACGGAATTCGA
640 |
641 10
642 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
643 EcoRI cut after the G so:
644 EcoRI._modify(10) -> 11.
645
646 if the enzyme cut twice _modify will returns two integer corresponding
647 to each cutting site.
648 """
649 yield location
650
651 @classmethod
653 """RE._rev_modify(location) -> generator of int.
654
655 for internal use only.
656
657 as _modify for site situated on the antiparallel strand when the
658 enzyme is not palindromic
659 """
660 yield location
661
662 @classmethod
664 """RE.characteristic() -> tuple.
665
666 the tuple contains the attributes:
667 fst5 -> first 5' cut ((current strand) or None
668 fst3 -> first 3' cut (complementary strand) or None
669 scd5 -> second 5' cut (current strand) or None
670 scd5 -> second 3' cut (complementary strand) or None
671 site -> recognition site.
672 """
673 return None, None, None, None, cls.site
674
675
676 -class OneCut(AbstractCut):
677 """Implement the methods specific to the enzymes that cut the DNA only once
678
679 Correspond to ncuts values of 2 in emboss_e.###
680
681 Internal use only. Not meant to be instantiated.
682 """
683
684 @classmethod
686 """RE.cut_once() -> bool.
687
688 True if the enzyme cut the sequence one time on each strand.
689 """
690 return True
691
692 @classmethod
694 """RE.cut_twice() -> bool.
695
696 True if the enzyme cut the sequence twice on each strand.
697 """
698 return False
699
700 @classmethod
702 """RE._modify(location) -> int.
703
704 for internal use only.
705
706 location is an integer corresponding to the location of the match for
707 the enzyme pattern in the sequence.
708 _modify returns the real place where the enzyme will cut.
709
710 example::
711
712 EcoRI pattern : GAATTC
713 EcoRI will cut after the G.
714 so in the sequence:
715 ______
716 GAATACACGGAATTCGA
717 |
718 10
719 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
720 EcoRI cut after the G so:
721 EcoRI._modify(10) -> 11.
722
723 if the enzyme cut twice _modify will returns two integer corresponding
724 to each cutting site.
725 """
726 yield location + cls.fst5
727
728 @classmethod
730 """RE._rev_modify(location) -> generator of int.
731
732 for internal use only.
733
734 as _modify for site situated on the antiparallel strand when the
735 enzyme is not palindromic
736 """
737 yield location - cls.fst3
738
739 @classmethod
741 """RE.characteristic() -> tuple.
742
743 the tuple contains the attributes:
744 fst5 -> first 5' cut ((current strand) or None
745 fst3 -> first 3' cut (complementary strand) or None
746 scd5 -> second 5' cut (current strand) or None
747 scd5 -> second 3' cut (complementary strand) or None
748 site -> recognition site.
749 """
750 return cls.fst5, cls.fst3, None, None, cls.site
751
754 """Implement the methods specific to the enzymes that cut the DNA twice
755
756 Correspond to ncuts values of 4 in emboss_e.###
757
758 Internal use only. Not meant to be instantiated."""
759
760 @classmethod
762 """RE.cut_once() -> bool.
763
764 True if the enzyme cut the sequence one time on each strand."""
765 return False
766
767 @classmethod
769 """RE.cut_twice() -> bool.
770
771 True if the enzyme cut the sequence twice on each strand.
772 """
773 return True
774
775 @classmethod
777 """RE._modify(location) -> int.
778
779 for internal use only.
780
781 location is an integer corresponding to the location of the match for
782 the enzyme pattern in the sequence.
783 _modify returns the real place where the enzyme will cut.
784
785 example::
786
787 EcoRI pattern : GAATTC
788 EcoRI will cut after the G.
789 so in the sequence:
790 ______
791 GAATACACGGAATTCGA
792 |
793 10
794 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
795 EcoRI cut after the G so:
796 EcoRI._modify(10) -> 11.
797
798 if the enzyme cut twice _modify will returns two integer corresponding
799 to each cutting site.
800 """
801 yield location + cls.fst5
802 yield location + cls.scd5
803
804 @classmethod
806 """RE._rev_modify(location) -> generator of int.
807
808 for internal use only.
809
810 as _modify for site situated on the antiparallel strand when the
811 enzyme is not palindromic
812 """
813 yield location - cls.fst3
814 yield location - cls.scd3
815
816 @classmethod
818 """RE.characteristic() -> tuple.
819
820 the tuple contains the attributes:
821 fst5 -> first 5' cut ((current strand) or None
822 fst3 -> first 3' cut (complementary strand) or None
823 scd5 -> second 5' cut (current strand) or None
824 scd5 -> second 3' cut (complementary strand) or None
825 site -> recognition site.
826 """
827 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
828
831 """Implement the information about methylation.
832
833 Enzymes of this class possess a site which is methylable.
834 """
835
836 @classmethod
838 """RE.is_methylable() -> bool.
839
840 True if the recognition site is a methylable.
841 """
842 return True
843
846 """Implement information about methylation sensitibility.
847
848 Enzymes of this class are not sensible to methylation.
849 """
850
851 @classmethod
853 """RE.is_methylable() -> bool.
854
855 True if the recognition site is a methylable.
856 """
857 return False
858
861 """Implement the methods specific to the enzymes which are palindromic
862
863 palindromic means : the recognition site and its reverse complement are
864 identical.
865 Remarks : an enzyme with a site CGNNCG is palindromic even if some
866 of the sites that it will recognise are not.
867 for example here : CGAACG
868
869 Internal use only. Not meant to be instantiated."""
870
871 @classmethod
873 """RE._search() -> list.
874
875 for internal use only.
876
877 implement the search method for palindromic and non palindromic enzyme.
878 """
879 siteloc = cls.dna.finditer(cls.compsite, cls.size)
880 cls.results = [r for s, g in siteloc for r in cls._modify(s)]
881 if cls.results:
882 cls._drop()
883 return cls.results
884
885 @classmethod
887 """RE.is_palindromic() -> bool.
888
889 True if the recognition site is a palindrom.
890 """
891 return True
892
895 """Implement the methods specific to the enzymes which are not palindromic
896
897 palindromic means : the recognition site and its reverse complement are
898 identical.
899
900 Internal use only. Not meant to be instantiated."""
901
902 @classmethod
904 """RE._search() -> list.
905
906 for internal use only.
907
908 implement the search method for palindromic and non palindromic enzyme.
909 """
910 iterator = cls.dna.finditer(cls.compsite, cls.size)
911 cls.results = []
912 modif = cls._modify
913 revmodif = cls._rev_modify
914 s = str(cls)
915 cls.on_minus = []
916 for start, group in iterator:
917 if group(s):
918 cls.results += [r for r in modif(start)]
919 else:
920 cls.on_minus += [r for r in revmodif(start)]
921 cls.results += cls.on_minus
922 if cls.results:
923 cls.results.sort()
924 cls._drop()
925 return cls.results
926
927 @classmethod
929 """RE.is_palindromic() -> bool.
930
931 True if the recognition site is a palindrom.
932 """
933 return False
934
937 """Implement the methods specific to the enzymes for which the overhang
938 is unknown.
939
940 These enzymes are also NotDefined and NoCut.
941
942 Internal use only. Not meant to be instantiated.
943 """
944
945 @classmethod
947 """RE.catalyse(dna, linear=True) -> tuple of DNA.
948 RE.catalyze(dna, linear=True) -> tuple of DNA.
949
950 return a tuple of dna as will be produced by using RE to restrict the
951 dna.
952
953 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
954
955 if linear is False, the sequence is considered to be circular and the
956 output will be modified accordingly.
957 """
958 raise NotImplementedError('%s restriction is unknown.'
959 % cls.__name__)
960 catalyze = catalyse
961
962 @classmethod
964 """RE.is_blunt() -> bool.
965
966 True if the enzyme produces blunt end.
967
968 see also:
969 RE.is_3overhang()
970 RE.is_5overhang()
971 RE.is_unknown()
972 """
973 return False
974
975 @classmethod
977 """RE.is_5overhang() -> bool.
978
979 True if the enzyme produces 5' overhang sticky end.
980
981 see also:
982 RE.is_3overhang()
983 RE.is_blunt()
984 RE.is_unknown()
985 """
986 return False
987
988 @classmethod
990 """RE.is_3overhang() -> bool.
991
992 True if the enzyme produces 3' overhang sticky end.
993
994 see also:
995 RE.is_5overhang()
996 RE.is_blunt()
997 RE.is_unknown()
998 """
999 return False
1000
1001 @classmethod
1003 """RE.overhang() -> str. type of overhang of the enzyme.,
1004
1005 can be "3' overhang", "5' overhang", "blunt", "unknown"
1006 """
1007 return 'unknown'
1008
1009 @classmethod
1011 """RE.compatible_end() -> list.
1012
1013 list of all the enzymes that share compatible end with RE.
1014 """
1015 return []
1016
1017 @classmethod
1019 """RE._mod1(other) -> bool.
1020
1021 for internal use only
1022
1023 test for the compatibility of restriction ending of RE and other.
1024 """
1025 return False
1026
1027
1028 -class Blunt(AbstractCut):
1029 """Implement the methods specific to the enzymes for which the overhang
1030 is blunt.
1031
1032 The enzyme cuts the + strand and the - strand of the DNA at the same
1033 place.
1034
1035 Internal use only. Not meant to be instantiated.
1036 """
1037
1038 @classmethod
1040 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1041 RE.catalyze(dna, linear=True) -> tuple of DNA.
1042
1043 return a tuple of dna as will be produced by using RE to restrict the
1044 dna.
1045
1046 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1047
1048 if linear is False, the sequence is considered to be circular and the
1049 output will be modified accordingly.
1050 """
1051 r = cls.search(dna, linear)
1052 d = cls.dna
1053 if not r:
1054 return d[1:],
1055 fragments = []
1056 length = len(r) - 1
1057 if d.is_linear():
1058
1059
1060
1061 fragments.append(d[1:r[0]])
1062 if length:
1063
1064
1065
1066 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1067
1068
1069
1070 fragments.append(d[r[-1]:])
1071 else:
1072
1073
1074
1075 fragments.append(d[r[-1]:] + d[1:r[0]])
1076 if not length:
1077
1078
1079
1080 return tuple(fragments)
1081
1082
1083
1084 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1085 return tuple(fragments)
1086 catalyze = catalyse
1087
1088 @classmethod
1090 """RE.is_blunt() -> bool.
1091
1092 True if the enzyme produces blunt end.
1093
1094 see also:
1095 RE.is_3overhang()
1096 RE.is_5overhang()
1097 RE.is_unknown()
1098 """
1099 return True
1100
1101 @classmethod
1103 """RE.is_5overhang() -> bool.
1104
1105 True if the enzyme produces 5' overhang sticky end.
1106
1107 see also:
1108 RE.is_3overhang()
1109 RE.is_blunt()
1110 RE.is_unknown()
1111 """
1112 return False
1113
1114 @classmethod
1116 """RE.is_3overhang() -> bool.
1117
1118 True if the enzyme produces 3' overhang sticky end.
1119
1120 see also:
1121 RE.is_5overhang()
1122 RE.is_blunt()
1123 RE.is_unknown()
1124 """
1125 return False
1126
1127 @classmethod
1129 """RE.overhang() -> str. type of overhang of the enzyme.,
1130
1131 can be "3' overhang", "5' overhang", "blunt", "unknown"
1132 """
1133 return 'blunt'
1134
1135 @classmethod
1137 """RE.compatible_end() -> list.
1138
1139 list of all the enzymes that share compatible end with RE.
1140 """
1141 if not batch:
1142 batch = AllEnzymes
1143 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
1144 return r
1145
1146 @staticmethod
1148 """RE._mod1(other) -> bool.
1149
1150 for internal use only
1151
1152 test for the compatibility of restriction ending of RE and other.
1153 """
1154 return issubclass(other, Blunt)
1155
1156
1157 -class Ov5(AbstractCut):
1158 """Implement the methods specific to the enzymes for which the overhang
1159 is recessed in 3'.
1160
1161 The enzyme cuts the + strand after the - strand of the DNA.
1162
1163 Internal use only. Not meant to be instantiated.
1164 """
1165
1166 @classmethod
1168 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1169 RE.catalyze(dna, linear=True) -> tuple of DNA.
1170
1171 return a tuple of dna as will be produced by using RE to restrict the
1172 dna.
1173
1174 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1175
1176 if linear is False, the sequence is considered to be circular and the
1177 output will be modified accordingly.
1178 """
1179 r = cls.search(dna, linear)
1180 d = cls.dna
1181 if not r:
1182 return d[1:],
1183 length = len(r) - 1
1184 fragments = []
1185 if d.is_linear():
1186
1187
1188
1189 fragments.append(d[1:r[0]])
1190 if length:
1191
1192
1193
1194 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1195
1196
1197
1198 fragments.append(d[r[-1]:])
1199 else:
1200
1201
1202
1203 fragments.append(d[r[-1]:] + d[1:r[0]])
1204 if not length:
1205
1206
1207
1208 return tuple(fragments)
1209
1210
1211
1212 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1213 return tuple(fragments)
1214 catalyze = catalyse
1215
1216 @classmethod
1218 """RE.is_blunt() -> bool.
1219
1220 True if the enzyme produces blunt end.
1221
1222 see also:
1223 RE.is_3overhang()
1224 RE.is_5overhang()
1225 RE.is_unknown()
1226 """
1227 return False
1228
1229 @classmethod
1231 """RE.is_5overhang() -> bool.
1232
1233 True if the enzyme produces 5' overhang sticky end.
1234
1235 see also:
1236 RE.is_3overhang()
1237 RE.is_blunt()
1238 RE.is_unknown()
1239 """
1240 return True
1241
1242 @classmethod
1244 """RE.is_3overhang() -> bool.
1245
1246 True if the enzyme produces 3' overhang sticky end.
1247
1248 see also:
1249 RE.is_5overhang()
1250 RE.is_blunt()
1251 RE.is_unknown()
1252 """
1253 return False
1254
1255 @classmethod
1257 """RE.overhang() -> str. type of overhang of the enzyme.,
1258
1259 can be "3' overhang", "5' overhang", "blunt", "unknown"
1260 """
1261 return "5' overhang"
1262
1263 @classmethod
1265 """RE.compatible_end() -> list.
1266
1267 list of all the enzymes that share compatible end with RE."""
1268 if not batch:
1269 batch = AllEnzymes
1270 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and
1271 x % cls)
1272 return r
1273
1274 @classmethod
1276 """RE._mod1(other) -> bool.
1277
1278 for internal use only
1279
1280 test for the compatibility of restriction ending of RE and other.
1281 """
1282 if issubclass(other, Ov5):
1283 return cls._mod2(other)
1284 else:
1285 return False
1286
1287
1288 -class Ov3(AbstractCut):
1289 """Implement the methods specific to the enzymes for which the overhang
1290 is recessed in 5'.
1291
1292 The enzyme cuts the - strand after the + strand of the DNA.
1293
1294 Internal use only. Not meant to be instantiated.
1295 """
1296
1297 @classmethod
1299 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1300 RE.catalyze(dna, linear=True) -> tuple of DNA.
1301
1302 return a tuple of dna as will be produced by using RE to restrict the
1303 dna.
1304
1305 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1306
1307 if linear is False, the sequence is considered to be circular and the
1308 output will be modified accordingly.
1309 """
1310 r = cls.search(dna, linear)
1311 d = cls.dna
1312 if not r:
1313 return d[1:],
1314 fragments = []
1315 length = len(r) - 1
1316 if d.is_linear():
1317
1318
1319
1320 fragments.append(d[1:r[0]])
1321 if length:
1322
1323
1324
1325 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1326
1327
1328
1329 fragments.append(d[r[-1]:])
1330 else:
1331
1332
1333
1334 fragments.append(d[r[-1]:] + d[1:r[0]])
1335 if not length:
1336
1337
1338
1339 return tuple(fragments)
1340
1341
1342
1343 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1344 return tuple(fragments)
1345 catalyze = catalyse
1346
1347 @classmethod
1349 """RE.is_blunt() -> bool.
1350
1351 True if the enzyme produces blunt end.
1352
1353 see also:
1354 RE.is_3overhang()
1355 RE.is_5overhang()
1356 RE.is_unknown()
1357 """
1358 return False
1359
1360 @classmethod
1362 """RE.is_5overhang() -> bool.
1363
1364 True if the enzyme produces 5' overhang sticky end.
1365
1366 see also:
1367 RE.is_3overhang()
1368 RE.is_blunt()
1369 RE.is_unknown()
1370 """
1371 return False
1372
1373 @classmethod
1375 """RE.is_3overhang() -> bool.
1376
1377 True if the enzyme produces 3' overhang sticky end.
1378
1379 see also:
1380 RE.is_5overhang()
1381 RE.is_blunt()
1382 RE.is_unknown()
1383 """
1384 return True
1385
1386 @classmethod
1388 """RE.overhang() -> str. type of overhang of the enzyme.,
1389
1390 can be "3' overhang", "5' overhang", "blunt", "unknown"
1391 """
1392 return "3' overhang"
1393
1394 @classmethod
1396 """RE.compatible_end() -> list.
1397
1398 list of all the enzymes that share compatible end with RE.
1399 """
1400 if not batch:
1401 batch = AllEnzymes
1402 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and
1403 x % cls)
1404 return r
1405
1406 @classmethod
1408 """RE._mod1(other) -> bool.
1409
1410 for internal use only
1411
1412 test for the compatibility of restriction ending of RE and other.
1413 """
1414
1415
1416
1417 if issubclass(other, Ov3):
1418 return cls._mod2(other)
1419 else:
1420 return False
1421
1424 """Implement the methods specific to the enzymes for which the overhang
1425 and the cut are not variable.
1426
1427 Typical example : EcoRI -> G^AATT_C
1428 The overhang will always be AATT
1429 Notes:
1430 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1431 There overhang is always the same : blunt!
1432
1433 Internal use only. Not meant to be instantiated."""
1434
1435 @classmethod
1470
1471 @classmethod
1473 """RE.is_defined() -> bool.
1474
1475 True if the sequence recognised and cut is constant,
1476 i.e. the recognition site is not degenerated AND the enzyme cut inside
1477 the site.
1478
1479 see also:
1480 RE.is_ambiguous()
1481 RE.is_unknown()
1482 """
1483 return True
1484
1485 @classmethod
1487 """RE.is_ambiguous() -> bool.
1488
1489 True if the sequence recognised and cut is ambiguous,
1490 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1491 the site.
1492
1493 see also:
1494 RE.is_defined()
1495 RE.is_unknown()
1496 """
1497 return False
1498
1499 @classmethod
1501 """RE.is_unknown() -> bool.
1502
1503 True if the sequence is unknown,
1504 i.e. the recognition site has not been characterised yet.
1505
1506 see also:
1507 RE.is_defined()
1508 RE.is_ambiguous()
1509 """
1510 return False
1511
1512 @classmethod
1514 """RE.elucidate() -> str
1515
1516 return a representation of the site with the cut on the (+) strand
1517 represented as '^' and the cut on the (-) strand as '_'.
1518 ie:
1519 >>> EcoRI.elucidate() # 5' overhang
1520 'G^AATT_C'
1521 >>> KpnI.elucidate() # 3' overhang
1522 'G_GTAC^C'
1523 >>> EcoRV.elucidate() # blunt
1524 'GAT^_ATC'
1525 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1526 '? GTATAC ?'
1527 >>>
1528 """
1529 f5 = cls.fst5
1530 f3 = cls.fst3
1531 site = cls.site
1532 if cls.cut_twice():
1533 re = 'cut twice, not yet implemented sorry.'
1534 elif cls.is_5overhang():
1535 if f5 == f3 == 0:
1536 re = 'N^' + cls.site + '_N'
1537 elif f3 == 0:
1538 re = site[:f5] + '^' + site[f5:] + '_N'
1539 else:
1540 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1541 elif cls.is_blunt():
1542 re = site[:f5] + '^_' + site[f5:]
1543 else:
1544 if f5 == f3 == 0:
1545 re = 'N_' + site + '^N'
1546 else:
1547 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1548 return re
1549
1550 @classmethod
1552 """RE._mod2(other) -> bool.
1553
1554 for internal use only
1555
1556 test for the compatibility of restriction ending of RE and other.
1557 """
1558
1559
1560
1561 if other.ovhgseq == cls.ovhgseq:
1562 return True
1563 elif issubclass(other, Ambiguous):
1564 return other._mod2(cls)
1565 else:
1566 return False
1567
1570 """Implement the methods specific to the enzymes for which the overhang
1571 is variable.
1572
1573 Typical example : BstXI -> CCAN_NNNN^NTGG
1574 The overhang can be any sequence of 4 bases.
1575 Notes:
1576 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1577 There overhang is always the same : blunt!
1578
1579 Internal use only. Not meant to be instantiated.
1580 """
1581
1582 @classmethod
1610
1611 @classmethod
1613 """RE.is_defined() -> bool.
1614
1615 True if the sequence recognised and cut is constant,
1616 i.e. the recognition site is not degenerated AND the enzyme cut inside
1617 the site.
1618
1619 see also:
1620 RE.is_ambiguous()
1621 RE.is_unknown()
1622 """
1623 return False
1624
1625 @classmethod
1627 """RE.is_ambiguous() -> bool.
1628
1629 True if the sequence recognised and cut is ambiguous,
1630 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1631 the site.
1632
1633 see also:
1634 RE.is_defined()
1635 RE.is_unknown()
1636 """
1637 return True
1638
1639 @classmethod
1641 """RE.is_unknown() -> bool.
1642
1643 True if the sequence is unknown,
1644 i.e. the recognition site has not been characterised yet.
1645
1646 see also:
1647 RE.is_defined()
1648 RE.is_ambiguous()
1649 """
1650 return False
1651
1652 @classmethod
1654 """RE._mod2(other) -> bool.
1655
1656 for internal use only
1657
1658 test for the compatibility of restriction ending of RE and other.
1659 """
1660
1661
1662
1663 if len(cls.ovhgseq) != len(other.ovhgseq):
1664 return False
1665 else:
1666 se = cls.ovhgseq
1667 for base in se:
1668 if base in 'ATCG':
1669 pass
1670 if base in 'N':
1671 se = '.'.join(se.split('N'))
1672 if base in 'RYWMSKHDBV':
1673 expand = '[' + matching[base] + ']'
1674 se = expand.join(se.split(base))
1675 if re.match(se, other.ovhgseq):
1676 return True
1677 else:
1678 return False
1679
1680 @classmethod
1682 """RE.elucidate() -> str
1683
1684 return a representation of the site with the cut on the (+) strand
1685 represented as '^' and the cut on the (-) strand as '_'.
1686 ie:
1687 >>> EcoRI.elucidate() # 5' overhang
1688 'G^AATT_C'
1689 >>> KpnI.elucidate() # 3' overhang
1690 'G_GTAC^C'
1691 >>> EcoRV.elucidate() # blunt
1692 'GAT^_ATC'
1693 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1694 '? GTATAC ?'
1695 >>>
1696 """
1697 f5 = cls.fst5
1698 f3 = cls.fst3
1699 length = len(cls)
1700 site = cls.site
1701 if cls.cut_twice():
1702 re = 'cut twice, not yet implemented sorry.'
1703 elif cls.is_5overhang():
1704 if f3 == f5 == 0:
1705 re = 'N^' + site + '_N'
1706 elif 0 <= f5 <= length and 0 <= f3 + length <= length:
1707 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1708 elif 0 <= f5 <= length:
1709 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N'
1710 elif 0 <= f3 + length <= length:
1711 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1712 elif f3 + length < 0:
1713 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site
1714 elif f5 > length:
1715 re = site + (f5 - length) * 'N' + '^' + (length +
1716 f3 - f5) * 'N' + '_N'
1717 else:
1718 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N'
1719 elif cls.is_blunt():
1720 if f5 < 0:
1721 re = 'N^_' + abs(f5) * 'N' + site
1722 elif f5 > length:
1723 re = site + (f5 - length) * 'N' + '^_N'
1724 else:
1725 raise ValueError('%s.easyrepr() : error f5=%i'
1726 % (cls.name, f5))
1727 else:
1728 if f3 == 0:
1729 if f5 == 0:
1730 re = 'N_' + site + '^N'
1731 else:
1732 re = site + '_' + (f5 - length) * 'N' + '^N'
1733 elif 0 < f3 + length <= length and 0 <= f5 <= length:
1734 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1735 elif 0 < f3 + length <= length:
1736 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N'
1737 elif 0 <= f5 <= length:
1738 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:]
1739 elif f3 > 0:
1740 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N'
1741 elif f5 < 0:
1742 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \
1743 + site
1744 else:
1745 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \
1746 'N' + '^N'
1747 return re
1748
1751 """Implement the methods specific to the enzymes for which the overhang
1752 is not characterised.
1753
1754 Correspond to NoCut and Unknown.
1755
1756 Internal use only. Not meant to be instantiated.
1757 """
1758
1759 @classmethod
1783
1784 @classmethod
1786 """RE.is_defined() -> bool.
1787
1788 True if the sequence recognised and cut is constant,
1789 i.e. the recognition site is not degenerated AND the enzyme cut inside
1790 the site.
1791
1792 see also:
1793 RE.is_ambiguous()
1794 RE.is_unknown()
1795 """
1796 return False
1797
1798 @classmethod
1800 """RE.is_ambiguous() -> bool.
1801
1802 True if the sequence recognised and cut is ambiguous,
1803 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1804 the site.
1805
1806 see also:
1807 RE.is_defined()
1808 RE.is_unknown()
1809 """
1810 return False
1811
1812 @classmethod
1814 """RE.is_unknown() -> bool.
1815
1816 True if the sequence is unknown,
1817 i.e. the recognition site has not been characterised yet.
1818
1819 see also:
1820 RE.is_defined()
1821 RE.is_ambiguous()"""
1822 return True
1823
1824 @classmethod
1826 """RE._mod2(other) -> bool.
1827
1828 for internal use only
1829
1830 test for the compatibility of restriction ending of RE and other.
1831 """
1832
1833
1834
1835
1836
1837
1838
1839 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!"
1840 % (str(cls), str(other), str(cls)))
1841
1842 @classmethod
1844 """RE.elucidate() -> str
1845
1846 return a representation of the site with the cut on the (+) strand
1847 represented as '^' and the cut on the (-) strand as '_'.
1848 ie:
1849 >>> EcoRI.elucidate() # 5' overhang
1850 'G^AATT_C'
1851 >>> KpnI.elucidate() # 3' overhang
1852 'G_GTAC^C'
1853 >>> EcoRV.elucidate() # blunt
1854 'GAT^_ATC'
1855 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1856 '? GTATAC ?'
1857 >>>
1858 """
1859 return '? %s ?' % cls.site
1860
1863
1864
1865
1866
1867 """Implement the methods specific to the enzymes which are commercially
1868 available.
1869
1870 Internal use only. Not meant to be instantiated.
1871 """
1872
1873 @classmethod
1875 """RE.suppliers() -> print the suppliers of RE."""
1876 for s in cls.suppl:
1877 print(suppliers_dict[s][0] + ',')
1878 return
1879
1880 @classmethod
1882 """RE.supplier_list() -> list.
1883
1884 list of the supplier names for RE.
1885 """
1886 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1887
1888 @classmethod
1890 """RE.buffers(supplier) -> string.
1891
1892 not implemented yet.
1893 """
1894 return
1895
1896 @classmethod
1898 """RE.iscomm() -> bool.
1899
1900 True if RE has suppliers.
1901 """
1902 return True
1903
1906 """Implement the methods specific to the enzymes which are not commercially
1907 available.
1908
1909 Internal use only. Not meant to be instantiated.
1910 """
1911
1912 @staticmethod
1914 """RE.suppliers() -> print the suppliers of RE."""
1915 return None
1916
1917 @classmethod
1919 """RE.supplier_list() -> list.
1920
1921 list of the supplier names for RE.
1922 """
1923 return []
1924
1925 @classmethod
1927 """RE.buffers(supplier) -> string.
1928
1929 not implemented yet.
1930 """
1931 raise TypeError("Enzyme not commercially available.")
1932
1933 @classmethod
1935 """RE.iscomm() -> bool.
1936
1937 True if RE has suppliers.
1938 """
1939 return False
1940
1950
1951 - def __init__(self, first=(), suppliers=()):
1952 """RestrictionBatch([sequence]) -> new RestrictionBatch."""
1953 first = [self.format(x) for x in first]
1954 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
1955 set.__init__(self, first)
1956 self.mapping = dict.fromkeys(self)
1957 self.already_mapped = None
1958
1960 if len(self) < 5:
1961 return '+'.join(self.elements())
1962 else:
1963 return '...'.join(('+'.join(self.elements()[:2]),
1964 '+'.join(self.elements()[-2:])))
1965
1967 return 'RestrictionBatch(%s)' % self.elements()
1968
1975
1978
1981
1982 - def get(self, enzyme, add=False):
1983 """B.get(enzyme[, add]) -> enzyme class.
1984
1985 if add is True and enzyme is not in B add enzyme to B.
1986 if add is False (which is the default) only return enzyme.
1987 if enzyme is not a RestrictionType or can not be evaluated to
1988 a RestrictionType, raise a ValueError.
1989 """
1990 e = self.format(enzyme)
1991 if e in self:
1992 return e
1993 elif add:
1994 self.add(e)
1995 return e
1996 else:
1997 raise ValueError('enzyme %s is not in RestrictionBatch'
1998 % e.__name__)
1999
2001 """B.lambdasplit(func) -> RestrictionBatch .
2002
2003 the new batch will contains only the enzymes for which
2004 func return True.
2005 """
2006 d = [x for x in filter(func, self)]
2007 new = RestrictionBatch()
2008 new._data = dict(zip(d, [True] * len(d)))
2009 return new
2010
2012 """B.add_supplier(letter) -> add a new set of enzyme to B.
2013
2014 letter represents the suppliers as defined in the dictionary
2015 RestrictionDictionary.suppliers
2016 return None.
2017 raise a KeyError if letter is not a supplier code.
2018 """
2019 supplier = suppliers_dict[letter]
2020 self.suppliers.append(letter)
2021 for x in supplier[1]:
2022 self.add_nocheck(eval(x))
2023 return
2024
2026 """B.current_suppliers() -> add a new set of enzyme to B.
2027
2028 return a sorted list of the suppliers which have been used to
2029 create the batch.
2030 """
2031 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
2032 return suppl_list
2033
2035 """ b += other -> add other to b, check the type of other."""
2036 self.add(other)
2037 return self
2038
2040 """ b + other -> new RestrictionBatch."""
2041 new = self.__class__(self)
2042 new.add(other)
2043 return new
2044
2046 """B.remove(other) -> remove other from B if other is a
2047 RestrictionType.
2048
2049 Safe set.remove method. Verify that other is a RestrictionType or can
2050 be evaluated to a RestrictionType.
2051 raise a ValueError if other can not be evaluated to a RestrictionType.
2052 raise a KeyError if other is not in B.
2053 """
2054 return set.remove(self, self.format(other))
2055
2056 - def add(self, other):
2057 """B.add(other) -> add other to B if other is a RestrictionType.
2058
2059 Safe set.add method. Verify that other is a RestrictionType or can be
2060 evaluated to a RestrictionType.
2061 raise a ValueError if other can not be evaluated to a RestrictionType.
2062 """
2063 return set.add(self, self.format(other))
2064
2066 """B.add_nocheck(other) -> add other to B. don't check type of other.
2067 """
2068 return set.add(self, other)
2069
2087
2089 """B.is_restriction(y) -> bool.
2090
2091 True is y or eval(y) is a RestrictionType.
2092 """
2093 return (isinstance(y, RestrictionType) or
2094 isinstance(eval(str(y)), RestrictionType))
2095
2096 - def split(self, *classes, **bool):
2097 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
2098
2099 it works but it is slow, so it has really an interest when splitting
2100 over multiple conditions.
2101 """
2102 def splittest(element):
2103 for klass in classes:
2104 b = bool.get(klass.__name__, True)
2105 if issubclass(element, klass):
2106 if b:
2107 continue
2108 else:
2109 return False
2110 elif b:
2111 return False
2112 else:
2113 continue
2114 return True
2115 d = [k for k in filter(splittest, self)]
2116 new = RestrictionBatch()
2117 new._data = dict(zip(d, [True] * len(d)))
2118 return new
2119
2121 """B.elements() -> tuple.
2122
2123 give all the names of the enzymes in B sorted alphabetically.
2124 """
2125 l = sorted(str(e) for e in self)
2126 return l
2127
2129 """B.as_string() -> list.
2130
2131 return a list of the name of the elements of B.
2132 """
2133 return [str(e) for e in self]
2134
2135 @classmethod
2137 """B.suppl_codes() -> dict
2138
2139 letter code for the suppliers
2140 """
2141 supply = dict((k, v[0]) for k, v in suppliers_dict.items())
2142 return supply
2143
2144 @classmethod
2146 """B.show_codes() -> letter codes for the suppliers"""
2147 supply = [' = '.join(i) for i in cls.suppl_codes().items()]
2148 print('\n'.join(supply))
2149 return
2150
2151 - def search(self, dna, linear=True):
2152 """B.search(dna) -> dict."""
2153
2154
2155
2156
2157 if not hasattr(self, "already_mapped"):
2158
2159
2160 self.already_mapped = None
2161 if isinstance(dna, DNA):
2162
2163
2164
2165
2166 if (str(dna), linear) == self.already_mapped:
2167 return self.mapping
2168 else:
2169 self.already_mapped = str(dna), linear
2170 fseq = FormattedSeq(dna, linear)
2171 self.mapping = dict((x, x.search(fseq)) for x in self)
2172 return self.mapping
2173 elif isinstance(dna, FormattedSeq):
2174 if (str(dna), dna.linear) == self.already_mapped:
2175 return self.mapping
2176 else:
2177 self.already_mapped = str(dna), dna.linear
2178 self.mapping = dict((x, x.search(dna)) for x in self)
2179 return self.mapping
2180 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"
2181 % type(dna))
2182
2183
2184
2185
2186
2187
2188
2189
2190 -class Analysis(RestrictionBatch, PrintFormat):
2191
2194 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
2195
2196 For most of the method of this class if a dictionary is given it will
2197 be used as the base to calculate the results.
2198 If no dictionary is given a new analysis using the Restriction Batch
2199 which has been given when the Analysis class has been instantiated."""
2200 RestrictionBatch.__init__(self, restrictionbatch)
2201 self.rb = restrictionbatch
2202 self.sequence = sequence
2203 self.linear = linear
2204 if self.sequence:
2205 self.search(self.sequence, self.linear)
2206
2208 return 'Analysis(%s,%s,%s)' %\
2209 (repr(self.rb), repr(self.sequence), self.linear)
2210
2212 """A._sub_set(other_set) -> dict.
2213
2214 Internal use only.
2215
2216 screen the results through wanted set.
2217 Keep only the results for which the enzymes is in wanted set.
2218 """
2219 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2220
2222 """A._boundaries(start, end) -> tuple.
2223
2224 Format the boundaries for use with the methods that limit the
2225 search to only part of the sequence given to analyse.
2226 """
2227 if not isinstance(start, int):
2228 raise TypeError('expected int, got %s instead' % type(start))
2229 if not isinstance(end, int):
2230 raise TypeError('expected int, got %s instead' % type(end))
2231 if start < 1:
2232 start += len(self.sequence)
2233 if end < 1:
2234 end += len(self.sequence)
2235 if start < end:
2236 pass
2237 else:
2238 start, end = end, start
2239 if start < end:
2240 return start, end, self._test_normal
2241
2243 """A._test_normal(start, end, site) -> bool.
2244
2245 Internal use only
2246 Test if site is in between start and end.
2247 """
2248 return start <= site < end
2249
2251 """A._test_reverse(start, end, site) -> bool.
2252
2253 Internal use only
2254 Test if site is in between end and start (for circular sequences).
2255 """
2256 return start <= site <= len(self.sequence) or 1 <= site < end
2257
2266
2267 - def print_that(self, dct=None, title='', s1=''):
2268 """A.print_that([dct[, title[, s1[,print_]]]]) -> print the results
2269 from dct.
2270
2271 If dct is not given the full dictionary is used.
2272 This method prints the output of A.format_output() and it is here
2273 for backwards compatibility.
2274 """
2275 print(self.format_output(dct, title, s1))
2276
2278 """A.change(**attribute_name) -> Change attribute of Analysis.
2279
2280 It is possible to change the width of the shell by setting
2281 self.ConsoleWidth to what you want.
2282 self.NameWidth refer to the maximal length of the enzyme name.
2283
2284 Changing one of these parameters here might not give the results
2285 you expect. In which case, you can settle back to a 80 columns shell
2286 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2287 you get it right.
2288 """
2289 for k, v in what.items():
2290 if k in ('NameWidth', 'ConsoleWidth'):
2291 setattr(self, k, v)
2292 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2293 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2294 elif k is 'sequence':
2295 setattr(self, 'sequence', v)
2296 self.search(self.sequence, self.linear)
2297 elif k is 'rb':
2298 self = Analysis.__init__(self, v, self.sequence, self.linear)
2299 elif k is 'linear':
2300 setattr(self, 'linear', v)
2301 self.search(self.sequence, v)
2302 elif k in ('Indent', 'Maxsize'):
2303 setattr(self, k, v)
2304 elif k in ('Cmodulo', 'PrefWidth'):
2305 raise AttributeError(
2306 'To change %s, change NameWidth and/or ConsoleWidth' % k)
2307 else:
2308 raise AttributeError('Analysis has no attribute %s' % k)
2309 return
2310
2311 - def full(self, linear=True):
2312 """A.full() -> dict.
2313
2314 Full Restriction Map of the sequence.
2315 """
2316 return self.mapping
2317
2318 - def blunt(self, dct=None):
2319 """A.blunt([dct]) -> dict.
2320
2321 Only the enzymes which have a 3'overhang restriction site.
2322 """
2323 if not dct:
2324 dct = self.mapping
2325 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2326
2328 """A.overhang5([dct]) -> dict.
2329
2330 Only the enzymes which have a 5' overhang restriction site.
2331 """
2332 if not dct:
2333 dct = self.mapping
2334 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2335
2337 """A.Overhang3([dct]) -> dict.
2338
2339 Only the enzymes which have a 3'overhang restriction site.
2340 """
2341 if not dct:
2342 dct = self.mapping
2343 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2344
2346 """A.defined([dct]) -> dict.
2347
2348 Only the enzymes that have a defined restriction site in Rebase.
2349 """
2350 if not dct:
2351 dct = self.mapping
2352 return dict((k, v) for k, v in dct.items() if k.is_defined())
2353
2355 """A.with_sites([dct]) -> dict.
2356
2357 Enzymes which have at least one site in the sequence.
2358 """
2359 if not dct:
2360 dct = self.mapping
2361 return dict((k, v) for k, v in dct.items() if v)
2362
2364 """A.without_site([dct]) -> dict.
2365
2366 Enzymes which have no site in the sequence.
2367 """
2368 if not dct:
2369 dct = self.mapping
2370 return dict((k, v) for k, v in dct.items() if not v)
2371
2373 """A.With_N_Sites(N [, dct]) -> dict.
2374
2375 Enzymes which cut N times the sequence.
2376 """
2377 if not dct:
2378 dct = self.mapping
2379 return dict((k, v) for k, v in dct.items()if len(v) == N)
2380
2382 if not dct:
2383 dct = self.mapping
2384 return dict((k, v) for k, v in dct.items() if len(v) in list)
2385
2387 """A.with_name(list_of_names [, dct]) ->
2388
2389 Limit the search to the enzymes named in list_of_names.
2390 """
2391 for i, enzyme in enumerate(names):
2392 if enzyme not in AllEnzymes:
2393 warnings.warn("no data for the enzyme: %s" % enzyme,
2394 BiopythonWarning)
2395 del names[i]
2396 if not dct:
2397 return RestrictionBatch(names).search(self.sequence, self.linear)
2398 return dict((n, dct[n]) for n in names if n in dct)
2399
2401 """A.with_site_size(site_size [, dct]) ->
2402
2403 Limit the search to the enzymes whose site is of size <site_size>.
2404 """
2405 sites = [name for name in self if name.size == site_size]
2406 if not dct:
2407 return RestrictionBatch(sites).search(self.sequence)
2408 return dict((k, v) for k, v in dct.items() if k in site_size)
2409
2411 """A.only_between(start, end[, dct]) -> dict.
2412
2413 Enzymes that cut the sequence only in between start and end.
2414 """
2415 start, end, test = self._boundaries(start, end)
2416 if not dct:
2417 dct = self.mapping
2418 d = dict(dct)
2419 for key, sites in dct.items():
2420 if not sites:
2421 del d[key]
2422 continue
2423 for site in sites:
2424 if test(start, end, site):
2425 continue
2426 else:
2427 del d[key]
2428 break
2429 return d
2430
2431 - def between(self, start, end, dct=None):
2432 """A.between(start, end [, dct]) -> dict.
2433
2434 Enzymes that cut the sequence at least in between start and end.
2435 They may cut outside as well.
2436 """
2437 start, end, test = self._boundaries(start, end)
2438 d = {}
2439 if not dct:
2440 dct = self.mapping
2441 for key, sites in dct.items():
2442 for site in sites:
2443 if test(start, end, site):
2444 d[key] = sites
2445 break
2446 continue
2447 return d
2448
2450 """A.show_only_between(start, end [, dct]) -> dict.
2451
2452 Enzymes that cut the sequence outside of the region
2453 in between start and end but do not cut inside.
2454 """
2455 d = []
2456 if start <= end:
2457 d = [(k, [vv for vv in v if start <= vv <= end])
2458 for k, v in self.between(start, end, dct).items()]
2459 else:
2460 d = [(k, [vv for vv in v if start <= vv or vv <= end])
2461 for k, v in self.between(start, end, dct).items()]
2462 return dict(d)
2463
2465 """A.only_outside(start, end [, dct]) -> dict.
2466
2467 Enzymes that cut the sequence outside of the region
2468 in between start and end but do not cut inside.
2469 """
2470 start, end, test = self._boundaries(start, end)
2471 if not dct:
2472 dct = self.mapping
2473 d = dict(dct)
2474 for key, sites in dct.items():
2475 if not sites:
2476 del d[key]
2477 continue
2478 for site in sites:
2479 if test(start, end, site):
2480 del d[key]
2481 break
2482 else:
2483 continue
2484 return d
2485
2486 - def outside(self, start, end, dct=None):
2487 """A.outside((start, end [, dct]) -> dict.
2488
2489 Enzymes that cut outside the region in between start and end.
2490 No test is made to know if they cut or not inside this region.
2491 """
2492 start, end, test = self._boundaries(start, end)
2493 if not dct:
2494 dct = self.mapping
2495 d = {}
2496 for key, sites in dct.items():
2497 for site in sites:
2498 if test(start, end, site):
2499 continue
2500 else:
2501 d[key] = sites
2502 break
2503 return d
2504
2506 """A.do_not_cut(start, end [, dct]) -> dict.
2507
2508 Enzymes that do not cut the region in between start and end.
2509 """
2510 if not dct:
2511 dct = self.mapping
2512 d = self.without_site()
2513 d.update(self.only_outside(start, end, dct))
2514 return d
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539 CommOnly = RestrictionBatch()
2540 NonComm = RestrictionBatch()
2541 for TYPE, (bases, enzymes) in typedict.items():
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559 bases = tuple(eval(x) for x in bases)
2560
2561
2562
2563
2564 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2565 for k in enzymes:
2566
2567
2568
2569
2570
2571 newenz = T(k, bases, enzymedict[k])
2572
2573
2574
2575
2576
2577 if newenz.is_comm():
2578 CommOnly.add_nocheck(newenz)
2579 else:
2580 NonComm.add_nocheck(newenz)
2581
2582
2583
2584 AllEnzymes = CommOnly | NonComm
2585
2586
2587
2588 names = [str(x) for x in AllEnzymes]
2589 try:
2590 del x
2591 except NameError:
2592
2593 pass
2594 locals().update(dict(zip(names, AllEnzymes)))
2595 __all__ = ('FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes',
2596 'CommOnly', 'NonComm') + tuple(names)
2597 del k, enzymes, TYPE, bases, names
2598